R Markdown

This is an R Markdown Test of my new data set from RCDB.

Stats_2021 <- read.csv("./data/US_Coaster_Stats_2021.csv")

Intro

Using RCDB’s data

You can also embed plots, for example:

column_name = c("State","Percent")
topStates = table(Stats_2021$State)
topStates = as.data.frame(topStates)
colnames(topStates)<-column_name
topStates = top_n(topStates,10,Percent)
topSum <- sum(topStates$Percent)

#Get the percentages of each state
for (state in topStates$Percent){
  percentages <- round(((topStates$Percent/(topSum))*100.0),digits=0)
}

piePlot <- ggplot(topStates, aes(x = "", y = Percent, fill = State)) +
  geom_col() + coord_polar(theta = "y", start=0) +  
  theme(axis.text = element_blank(),
        axis.ticks = element_blank(),
        panel.grid  = element_blank()) + 
  geom_text(aes(label = paste0((State)," ",(percentages), "%")),
        position = position_stack(vjust = 0.5), angle = c(-60,-13,20,50,70,-80,-50,-10,25,70)) + 
  ggtitle("Top 10 States With the Most Coasters")

piePlot

1.What are the most popular manufactures?

column_name = c("Manufacture", "Installations")
topManufactures = table(Stats_2021$Make)
topManufactures = as.data.frame(topManufactures)
colnames(topManufactures) <- column_name
topManufactures = top_n(topManufactures, 10, Installations)
barplot <- ggplot(data = topManufactures, width = 700,height = 700, mapping = aes(x = Manufacture, y = Installations)) + 
  geom_bar(stat="identity", fill=rainbow(10),position=position_dodge()) + 
  geom_text(aes(label = paste0((Installations), " installs")), position = position_stack(vjust = 0.5)) + 
  #theme(axis.text.x=element_blank(),
        #axis.ticks.x=element_blank()) +
  coord_flip() + 
  ggtitle("Top 10 Most Popular Manufactures")
ggplotly(barplot, tooltip = "text")

Random Variables of the data set that show the history of coasters. The End of the 19th century…

WestStates = c("California","Nevada","Arizona","Idaho", "Oregon","Washington", "New Mexico","Colorado" , "Utah", "Montana", "Wyoming")
EastStates = c("Maine" , "Massachusetts", "Vermont" ,"New Hampshire" ,"Rhode Island","Connecticut", "New York" ,"Delaware" , "Maryland" , "Virginia" , "Pennsylvania" , "New Jersey" ,"Virginia" , "North Carolina" , "South Carolina" , "Georgia" , "Florida")
Makers = c("Intamin Amusement Rides", "Bolliger & Mabillard")

westCoastIntamin <- nrow(data_frame(Stats_2021) %>% select("Name","State","Make") %>% filter(State %in% WestStates) %>% filter(Make == Makers[1]))
eastCoastIntamin <- nrow(data_frame(Stats_2021) %>% select("Name","State","Make") %>% filter(State %in% EastStates) %>% filter(Make == Makers[1]))
westCoastBM <- nrow(data_frame(Stats_2021) %>% select("Name","State","Make") %>% filter(State %in% WestStates) %>% filter(Make == Makers[2]))
eastCoastBM <-  nrow(data_frame(Stats_2021) %>% select("Name","State","Make") %>% filter(State %in% EastStates) %>% filter(Make == Makers[2]))

maker <- c(rep("Intamin", 2), rep("Bollinger and Mabillard",2))
region <- rep(c("West", "East"),2)
value <- c(westCoastIntamin,eastCoastIntamin,westCoastBM,eastCoastBM)
regionPopulations <- data.frame(maker,region,value)

barplot <- ggplot(regionPopulations,width = 700,height = 700,aes(fill=region,y=value,x=maker)) + geom_bar(position="stack",stat="identity") +
   geom_text(aes(label = paste0((value),  " installs")), position = position_stack(vjust = 0.5)) + 
  coord_flip() + 
  ggtitle("Swiss Maker Installs Between Coasts")
ggplotly(barplot, tooltip = "text")

#Probability of a coaster being made by Intamin given it is on the west coast? #A: Made by Intamin #B. Located on the west coast #P(A | B) = P(A intersect B) / P(B) #P(B) = Total

probTotal <- westCoastIntamin + eastCoastIntamin + westCoastBM + eastCoastBM
probWestIntamin <- (westCoastIntamin)/(probTotal)
probEastIntamin <- (eastCoastIntamin)/(probTotal)

probTotal <- westCoastIntamin + eastCoastIntamin + westCoastBM + eastCoastBM
probWestBM <- (westCoastBM)/(probTotal)
probEastBM <- (eastCoastBM)/(probTotal)

maker <- c(rep("Intamin", 2), rep("Bollinger and Mabillard",2))
region <- rep(c("West", "East"),2)
value <- c(probWestIntamin,probEastIntamin,probWestBM,probEastBM)
regionPopulations <- data.frame(region,value)

barplot <- ggplot(regionPopulations,width = 700,height = 700,aes(fill=region,y=value,x=maker)) + geom_bar(position="stack",stat="identity") +
   geom_text(aes(label = paste0((value))), position = position_stack(vjust = 0.5)) + 
  ggtitle("Probability Of Swiss Installs Between Coasts")

ggplotly(barplot, tooltip = "text")

#Standard Histogram of height

stats <- as.data.frame(Stats_2021) %>% select(Height,Material)
stats <- na.omit(stats)

sample_steel <- stats %>% filter(Material=="Steel")
sample_wood  <- stats %>% filter(Material=="Wood")

mean_steel <- round(mean(sample_steel$Height))
mean_wood <- round(mean(sample_wood$Height))
std_steel <- round(sd(sample_steel$Height))
std_wood <- round(sd(sample_wood$Height))
sample_size <- nrow(stats)

stats <- data.frame(
  Material=factor(rep(c("Steel", "Wood"),each=sample_size)),
  Height=round(c(rnorm(sample_size, mean=mean_steel, sd=std_steel), rnorm(sample_size, mean=mean_wood, sd=std_wood)))
)

ggplot(stats,aes(y=Height,fill=Material)) + geom_histogram() + 
  ggtitle("Normal Distribution of US Coaster Height") 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#Standard Histogram of height

stats <- as.data.frame(Stats_2021) %>% select(Inversions,Material)
stats <- na.omit(stats)

sample_steel <- stats %>% filter(Material=="Steel")
sample_wood  <- stats %>% filter(Material=="Wood")

mean_steel <- round(mean(sample_steel$Inversions))
mean_wood <- round(mean(sample_wood$Inversions))
std_steel <- round(sd(sample_steel$Inversions))
std_wood <- round(sd(sample_wood$Inversions))
sample_size <- nrow(stats)

stats <- data.frame(
  Material=factor(rep(c("Steel", "Wood"),each=sample_size)),
  Inversions=round(c(rnorm(sample_size, mean=mean_steel, sd=std_steel), rnorm(sample_size, mean=mean_wood, sd=std_wood)))
)

ggplot(stats,aes(y=Inversions,fill=Material)) + geom_histogram() + ggtitle("Normal Distribution of US Coaster Inversions")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#T Distribution Histogram of height

stats <- as.data.frame(Stats_2021) %>% select(Inversions,Material)
stats <- na.omit(stats)

sample_steel <- stats %>% filter(Material=="Steel")
sample_wood  <- stats %>% filter(Material=="Wood")

mean_steel <- round(mean(sample_steel$Inversions))
mean_wood <- round(mean(sample_wood$Inversions))
std_steel <- round(sd(sample_steel$Inversions))
std_wood <- round(sd(sample_wood$Inversions))
sample_size <- nrow(stats)

stats <- data.frame(
  Material=factor(rep(c("Steel", "Wood"),each=sample_size)),
  Inversions=round(c(rnorm(sample_size, mean=mean_steel, sd=std_steel), rnorm(sample_size, mean=mean_wood, sd=std_wood)))
)

ggplot(stats,aes(y=Inversions,fill=Material)) + geom_histogram() + ggtitle("Normal Distribution of US Coaster Inversions")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.